{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# LinearRegression 实验"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### 一、原方案計算數據集，使用numpy回歸計算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "usage: __main__.py [-h] [--batch BATCH] [--epoch EPOCH] [--rate RATE]\n",
      "__main__.py: error: unrecognized arguments: -f /run/user/1000/jupyter/kernel-2da79057-b38f-485f-ad05-1ac8ecebeb6b.json\n"
     ]
    },
    {
     "ename": "SystemExit",
     "evalue": "2",
     "output_type": "error",
     "traceback": [
      "An exception has occurred, use %tb to see the full traceback.\n",
      "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/cgh/.linuxbrew/opt/python3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2889: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
      "  warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
     ]
    }
   ],
   "source": [
    "#!/usr/bin/env python3\n",
    "\n",
    "import random\n",
    "import argparse\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# dataset file\n",
    "DATASET_FILE = \"LinearRegressionDataSet_2.txt\"\n",
    "\n",
    "# default learning rate\n",
    "ALPHA = 0.01\n",
    "# default min batch size\n",
    "MIN_BATCH = 200\n",
    "# default learning times\n",
    "LEARNING_TIMES = 4500\n",
    "\n",
    "\n",
    "def generate_min_batch(dataset, batch_size):\n",
    "    \"\"\"\n",
    "    Generate Min Batch.\n",
    "    \"\"\"\n",
    "    dataset_shape = dataset.shape\n",
    "    if dataset_shape[0] < batch_size:\n",
    "        batch_size = dataset_shape[0]\n",
    "\n",
    "    row_indexes = [i for i in range(dataset_shape[0])]\n",
    "    random.shuffle(row_indexes)\n",
    "    random_row_indexes = row_indexes[:batch_size]\n",
    "    hsplit_indice = dataset_shape[1] - 1\n",
    "    random_dataset = (dataset[random_row_indexes, :hsplit_indice],\n",
    "                      dataset[random_row_indexes, hsplit_indice:])\n",
    "    return random_dataset\n",
    "\n",
    "\n",
    "def hypothesis(input_x, θ):\n",
    "    \"\"\"\n",
    "    Hypothesis function 假设函数\n",
    "\n",
    "    hθ(x) = θ.T * x\n",
    "    \"\"\"\n",
    "    res = np.dot(input_x, θ.T)\n",
    "    return res\n",
    "\n",
    "\n",
    "def deviation(input_x, output_y, θ):\n",
    "    \"\"\"\n",
    "    Deviation function 代价/偏差函数\n",
    "    \"\"\"\n",
    "    res = sum([pow(hypothesis(x, θ) - y, 2) for x, y in zip(input_x, output_y)])\n",
    "    return res / (2 * input_x.shape[0])\n",
    "\n",
    "\n",
    "def gradientDescent(dataset, batch_size, learning_rate, learning_times):\n",
    "    print(\"------Start gradient descent------\")\n",
    "    process = 0\n",
    "\n",
    "    hsplit_indice = (dataset.shape)[1] - 1\n",
    "    θ = np.random.rand(1, hsplit_indice)\n",
    "    # print(θ)  # [[ 0.92081042  0.78857252]]\n",
    "    # print(θ.shape)  # (1, 2)\n",
    "\n",
    "    _θ = θ.copy()\n",
    "    while process < learning_times:\n",
    "        batch_data = generate_min_batch(dataset, batch_size)\n",
    "        input_x = batch_data[0]\n",
    "        output_y = batch_data[1]\n",
    "\n",
    "        for i in range(θ.shape[1]):\n",
    "            derivative = np.dot(\n",
    "                (np.array([hypothesis(row, θ) for row in input_x]) - output_y).T,\n",
    "                input_x[:, i:i+1])\n",
    "            avg_derivative = derivative / batch_size\n",
    "            # θ = θ - αJ(θ)'\n",
    "            _θ[0, i] = θ[0, i] - learning_rate * avg_derivative\n",
    "\n",
    "        θ = _θ\n",
    "        # calculate deviation\n",
    "        batch_test_data = generate_min_batch(dataset, batch_size)\n",
    "        current_deviation = deviation(batch_test_data[0], batch_test_data[1], θ)\n",
    "        if (process % 10 == 0):\n",
    "            print(\"step %d, deviation %2f\" % (process, current_deviation))\n",
    "            print(\"θ: %s\" % (θ))\n",
    "        process += 1\n",
    "\n",
    "    print(\"θ value is: %s\" % (θ))\n",
    "    print(\"------End gradient descent------\")\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    parser = argparse.ArgumentParser()\n",
    "    parser.add_argument('--batch', type=int, default=MIN_BATCH)\n",
    "    parser.add_argument('--epoch', type=int, default=LEARNING_TIMES)\n",
    "    parser.add_argument('--rate', type=float, default=ALPHA)\n",
    "\n",
    "    args = parser.parse_args()\n",
    "    dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "    gradientDescent(dataset, args.batch, args.rate, args.epoch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### 二、自己的方案"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "#### 1、初步判斷樣本集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAG+FJREFUeJzt3XuQVeW55/Hvs/veNoJoHwExNhUuEY2aESF2qhBEIdaJ\nAUuj0d6BOUOlu0LGgDFhA1Uz6swxQXO8K1avhCRtSVS8pNBUJmAMSE16BwXFnABH4ahEPSCiTbBF\n7s/80bs9hAH26u59Xfv3qdq1L/0u9vMW+uuXd633XebuiIhI8YvluwAREckMBbqISEQo0EVEIkKB\nLiISEQp0EZGIUKCLiERE2kA3s1Fmtv6Ix24zm2NmA83seTPbnHo+JRcFi4jIsVlPrkM3szLgPWAc\n8F3gI3dfaGbzgFPcPZGdMkVEJJ2eTrlMAv7d3bcCU4G21OdtwLRMFiYiIj1T3sP23wQeS70+3d23\npV5vB04/1gFm1gw0A1RXV1/4uc99rjd1Fq3Dhw8Ti5XWqYpS63Op9RfU51x74403drp7fbp2oadc\nzKwS+A/gHHd/38x2ufuAI37e4e4nnEcfNWqUv/7666G+LypWrVrFhAkT8l1GTpVan0utv6A+55qZ\nrXP3Mena9eTXzRXAK+7+fur9+2Y2OPVlg4EdPS9TREQypSeBfj3/Od0C8CwwI/V6BrAsU0WJiEjP\nhQp0MzsJuBx45oiPFwKXm9lm4LLUexERyZNQJ0Xd/RPg1KM++5Cuq15ERKQAlNZpahGRCFOgi4hE\nhAJdROQEGhoaMDMmT55MEAT5LueEFOgiIkeZMmUKFRUVmBlbt24F4MCBA7S0tBR0qCvQRUSAZDLJ\nVVddRVVVFStWrODgwYPHbPf000/nuLLwerr0X0QkUpLJJPPmzWP16tWh2l999dVZrqj3FOgiUpLG\njRvHyy+/TNjtT8rKyli0aBHNzc1Zrqz3FOgiUlKCIOA73/kOhw8fDtX+ggsuYNGiRezbt6/g96/R\nHLqIRN7o0aOJxWLEYjFaWlrShnlFRQXjx4+nvb2dV199lYsvvjhHlfaNAl1EIimRSHDmmWcSi8XY\ntGkT7p52eqWqqoqmpib279/Piy++WDRB3k1TLiISKclkkhkzZrB58+bQx3RPqxRbgB9NgS4iRS8I\nAm655RY++ugj9u/fH+oYM+Oiiy5izZo1Wa4udzTlIiJFKwgChgwZQktLC9u3bw8V5t1z44cPH45U\nmING6CJShBKJBA8++CB79uwJ1b68vJxLL72U5cuXZ7my/FKgi0hRSCQS/OpXv2Lnzp3s3bs31DFn\nnXUWCxYsKOhrxzNJgS4iBSuZTDJ9+nS2bNkS+pjTTjuNa665hunTpxf9Sc6eUqCLSEGaMmUKK1as\nCNW2traWhoYGZs+eXTKj8WNRoItIwUgkEgRBwO7du0Ot5CwvL+f73/8+d9xxRw6qK3wKdBHJu3g8\nzhNPPHHcHQ6PVl1dzdVXX82jjz6a5cqKiy5bFJG8iMfjVFZWEovFWLJkSagwP/XUU2ltbeXTTz9V\nmB+DRugikjO9WcVZW1vLPffcU9Jz42FphC4iWRcEAYMHD6axsTFUmJeVldG/f3/mzp3LJ598ojAP\nKdQI3cwGAD8DzgUc+G/A68ATQAPwNnCtu3dkpUoRKTqJRIKHHnqIPXv2hNpzvKysjPr6em677TYF\neC+FnXK5D/idu19jZpVALbAAeMHdF5rZPGAekMhSnSJSROLxOEuWLAnVNhaLcf3112tOPAPSBrqZ\n9QfGA/8VwN33A/vNbCowIdWsDViFAl2kZAVBwOLFi6murg51O7fKykrmzJmjSw4zyNL9U8jMLgAC\nYCNwPrAOmA285+4DUm0M6Oh+f9TxzUAzQH19/YVLly7NaAcKXWdnJ3V1dfkuI6dKrc+l1l/4zz5v\n2LCB5cuX88orr/Dee++FOnbIkCEsWLCAc845J8tVZlY+/54nTpy4zt3HpG3Yven78R7AGOAgMC71\n/j7gfwO7jmrXke7PGjlypJealStX5ruEnCu1Ppdaf93dL7vsMi8rK3O6zqmlfcRiMR80aJC3trbm\nu/Rey+ffM7DW0+Sru4eaQ38XeNfdu/eZfIqu+fL3zWywu28zs8HAjjC/aUSkOAVBwK233sr27dvT\nnuQsKyvjyiuvZNCgQSW5p0q+pA10d99uZu+Y2Sh3fx2YRNf0y0ZgBrAw9bwsq5WKSF70dKvaqNz9\npxiFvcrlRmBJ6gqXN4F/ousa9qVmNhPYClybnRJFJNe6t6rdtWsXnZ2dadv379+fUaNGMXPmTF1y\nmEehAt3d19M1l360SZktR0TyKZFIcPfdd4feU8XMuPzyyyN/44hioZWiIiUuCAIaGhqIxWLceeed\nocK8X79+tLa2cvjwYYV5AdFeLiIlqvsk57Zt20K1Ly8vp7GxkYULF7Jv3z4mTJiQ3QKlxxToIiUk\nmUwya9Ys/vznP4fab7ympoaBAwfS1NT0dwuAVq1alcUqpbcU6CIlIAgCFixYwIcffhiqvZlxww03\naDl+kVGgi0RYb7arHTt2LGvWrEnfUAqOToqKREwQBJx66qmYWajtaquqqhg9ejStra24u8K8iGmE\nLhIByWSSefPmkUwmOXDgQKhjtMth9GiELlLEui85bGxsZPXq1aHCvKamhrlz53Lo0CGFecRohC5S\nhBKJBHfddReHDh0K1f7ISw61JD+6FOgiRSIIAu699162bNkSelpFKzlLiwJdpMDF43Eef/zx0KNx\ngKamJk2nlCAFukgBSiaTTJ06lQ8++CD0MbFYjDFjxugqlRKmk6IiBSQIAmpqamhsbAwd5mPHjsXd\nOXTokMK8xCnQRQpAIpGgoqKClpYW9u7dm7Z9dXU1TU1Num5c/o6mXETyJB6P8+STT3LgwIG0dwCC\nrimV8847TzePkONSoIvkUBAELF68mPXr17N///5Qx1RVVXH//ffrxhGSlgJdJAfi8ThPPfUU+/bt\nC33MgAED+O1vf6vRuISmQBfJonHjxvHSSy+Fbh+Lxfj85z9PW1ubglx6TCdFRTIsmUxyySWXYGah\nw3zEiBG0t7dz6NAh3njjDYW59IpG6CIZ0r2Sc9OmTaHal5WVMWzYMB555BEFuGSEAl2kDxKJBIsW\nLWLv3r2hb6w8bdo05s6dqxCXjFOgi/RCb24cMWLECM2NS1aFCnQzexv4GDgEHHT3MWY2EHgCaADe\nBq51947slClSGBKJBA8++CB79uwJ1b68vJzrrrtO+6pITvRkhD7R3Xce8X4e8IK7LzSzean3iYxW\nJ1IAuq8df+utt0Itxx86dChDhgxh5syZunZccqovUy5TgQmp123AKhToEhE9DXGAQYMGcdtttynE\nJW8szJJjM3sL6AAcaHX3wMx2ufuA1M8N6Oh+f9SxzUAzQH19/YVLly7NZP0Fr7Ozk7q6unyXkVPF\n3Ofbb7+dlStXht6qtra2lvPOO494PM4555yT5eoKRzH/HfdWPvs8ceLEde4+Jm1Dd0/7AM5IPf8D\n8BowHth1VJuOdH/OyJEjvdSsXLky3yXkXLH1ubW11c8++2wvKytzugYtaR/l5eXe1NTk7sXX30xQ\nn3MLWOshsjrUwiJ3fy/1vAP4NTAWeN/MBgOknneE+lUjUiCCIGDw4MG0tLSwadOmUKPy0aNH09ra\nyoEDB3SiUwpO2jl0MzsJiLn7x6nXk4H/BTwLzAAWpp6XZbNQkUzp3uUw7OZYNTU1XHTRRbofpxS8\nMCdFTwd+3TVNTjnwK3f/nZm9DCw1s5nAVuDa7JUp0jfJZJKvf/3r7Ny5M33jlLq6OmbNmsUdd9yR\nxcpEMidtoLv7m8D5x/j8Q2BSNooSyZTeLAAaPny4luNLUdJKUYmcZDLJrFmzeO2110LdOAK6djm8\n7LLLWL58eZarE8ke7bYokRGPx6moqKCxsZH169enDfOKiorPTnIeOnRIYS5FTyN0KWpBEHDTTTeF\nXooPXbsc3nzzzZobl8hRoEtR6p5WWb9+faj2Zsb555+v+3FKpCnQpagEQcCtt97Ktm3bQh8zduxY\n1qxZk8WqRAqD5tCl4CUSCfr160dZWRktLS1pw9zMGDRoEK2trbi7wlxKhkboUrCCIGD27Nns3bs3\nVPuqqiruv/9+bY4lJUuBLgWle25848aNoVdyjh8/Xqs4RVCgS4Ho6UnO2tpaxowZoyAXOYICXfKm\nN5ccnnzyyfzkJz/RtIrIMeikqOTckbschgnzWCzGwIEDaW1t5W9/+5vCXOQ4NEKXnEgmk8ybN48/\n/vGPobapLS8v59xzz9V14yI9oECXrAqCgHvvvZdNmzaFaj9gwACam5u1ilOkFxTokhXxeJzHHnuM\nw4cPh2pfWVnJAw88oOkUkT5QoEvGxONxnnjiCQ4ePBj6mIaGBubPn68gF8kABbr0WRAE/PCHP2T3\n7t2h2psZw4cPp62tTfPjIhmkQJde6T7JuXbt2lBXqtTU1DBw4ECampo0Py6SJQp06ZGGhga2bt3a\no2MmT56svcZFckDXoUtayWSSkSNHYmY9CvPJkyfj7gpzkRzRCF2OKx6P8+STT4beU6Vfv34MHTqU\nK664grvuuivL1YnI0TRCl7+TSCQ4+eSTMTOWLFkSKsy7V3Hu3r2bjRs3cuWVV+agUhE5mkboAvTu\nksOzzjqLBQsW6JJDkQIROtDNrAxYC7zn7l8zs2HA48CpwDrgW+4e7t/mUjASiQR33313j4L87LPP\nZuPGjVmsSkR6oydTLrOBI9dv3wHc4+7DgQ5gZiYLk+xJJpP8+Mc/ZsqUKdx5551pw7ysrIyGhobP\n7gCkMBcpTKFG6GY2FPhH4Hbg+2ZmwKXADakmbcCtwMNZqFEyIJlM8sgjj/CnP/2J1157DXdPe4zu\nACRSXMJOudwLzAX6pd6fCuxy9+6h3bvAGRmuTTIgCALmz5/PRx99FPqY6upqrr76ah599NEsViYi\nmZY20M3sa8AOd19nZhN6+gVm1gw0A9TX17Nq1aqe/hFFrbOzMy99fu6553jooYfYt29fqPaVlZV8\n4QtfoLm5mXPOOQeg13Xnq8/5Umr9BfW5YLn7CR/Aj+kagb8NbAf2AEuAnUB5qs3FwPJ0f9bIkSO9\n1KxcuTJn39Xa2uqjR4/22tpaB9I+mpqa/Ec/+pG3t7dntI5c9rkQlFp/3dXnXAPWepp8dff0I3R3\nnw/MB0iN0H/g7k1m9iRwDV1XuswAlmXiF4z0Ttgl+fX19QwbNoyZM2dqblwkYvpyHXoCeNzM/hl4\nFVicmZIkjEQiQRAEdHZ2hr7kUHuqiERbjwLd3VcBq1Kv3wTGZr4kOZEgCPje974Xem4cYPjw4Tzy\nyCPaqlYk4rT0vwgkk0muuuoqTjrpJFpaWkKFeU1NDXPnzsXd2bx5s8JcpARo6X8BSyaTTJ8+nS1b\ntoQ+prKykjlz5mjPcZESpEAvQOPGjWPt2rU9uh/nN77xDV03LlLiNOVSII7cc/yll14KFeannXYa\n7e3t7Nu3T2EuIgr0fIvH41RVVdHY2MjmzZvTto/FYgwaNIjW1lY++OADzY2LyGc05ZIHiUSC1tZW\nOjs7OXToUKhjampquPHGGzU3LiLHpUDPoZ7uq1JbW8vkyZOZO3euRuIikpYCPcs2bNjALbfcwiuv\nvEJnZ2eoYwYNGsS2bduyXJmIRI0CPUvi8ThPP/00e/fuDdU+FotRWVmpXQ5FpNcU6BmUSCT4xS9+\nwYcffhj6ksP6+nqWLVumKRUR6TMFegZMmTKF3//+96FDvLa2FjNj2rRpGo2LSMbossU+SCQSVFRU\nsGLFilBhXl9fT3t7O5988gmdnZ0KcxHJKAV6DwVBwODBg4nFYqHuxwlw0kkn0dTUxI4dOzS1IiJZ\noymXkJLJJDNmzAi1+Ae6bqw8adIk5s+fz4QJE7JbnIgIGqGfUDKZ5JJLLqG2tjb0Ss7uVZwHDx7U\n3uMiklMaoR9DEATcd999bNy4MVT76upqhg0bxpw5c3QXIBHJGwX6EZLJJLNmzWL9+vWh2ldUVHDT\nTTdpOb6IFISSD/QgCFiwYAEdHR2hrlSpqKigrq6Ob3/72wpyESkoJRnoyWSSO++8kz/84Q/s3r07\nbfuamhqqq6sV4iJS0Eoq0BOJBD/96U/p6OgI1f6CCy5g0aJFutRQRIpCyQR6Q0MDW7duDdV29OjR\nzJ49Wyc4RaSoRPayxSAIGDZsGFVVVcRisbRhbmYMHz6c9vZ2NmzYoDAXkaKTdoRuZtXAaqAq1f4p\nd7/FzIYBjwOnAuuAb7n7/mwWG0ZPRuIAAwYMoLm5WXPjIlL0wozQ9wGXuvv5wAXAV83sy8AdwD3u\nPhzoAGZmr8wTi8fj1NXVhRqJQ9eVKtOmTaO9vZ2Ojg6FuYhEQtoRurs70H1nhorUw4FLgRtSn7cB\ntwIPZ77E4wuCgB/84Ad8/PHHodp3L8fXCk4RiaJQc+hmVmZm64EdwPPAvwO73L17Z6p3gTOyU+Lf\nSyQSn20/29LSkjbMY7EY1dXVNDU1aTm+iESadQ3AQzY2GwD8GvgfwC9T0y2Y2ZnA/3H3c49xTDPQ\nDFBfX3/h0qVLe1Xoc889xwMPPMCBAwfC1sp1111HS0tLr74vUzo7O6mrq8trDblWan0utf6C+pxr\nEydOXOfuY9I2dPcePYD/CfwQ2AmUpz67GFie7tiRI0d6T7S2tnpDQ4ObmdM1zZP2MX78eG9vb+/R\n92TTypUr811CzpVan0utv+7qc64Baz1EPqedcjGz+tTIHDOrAS4HNgErgWtSzWYAy8L9rkkvCAJO\nPvlkWlpaePvtt7t/kRxXZWXlZyc5X3zxRS0EEpGSFGZh0WCgzczK6JpzX+ruvzGzjcDjZvbPwKvA\n4r4UkkwmmT59Olu2bOnRcWPHjmXNmjV9+WoRkUgIc5XLn4EvHePzN4GxfS2ge1+VZcuWpR2Jdysv\nL+e6667TLdxERI6Ql6X/yWSSyZMn09nZmb7xETQaFxE5vpwu/f/000+55JJLaGxsDB3m5eXlNDU1\n4e4KcxGRE8jpCP2dd97hnXfeOWEbMyMWi/HFL35ROx2KiPRAwey2OGLECNra2hTgIiK9VBCB3tTU\npBOcIiJ9lLftcxsaGmhtbcXdFeYiIhmQ0xF6//79uf7665k+fbqmVkREMiyngX766afz8MM53ZBR\nRKRkRPaORSIipUaBLiISEQp0EZGIUKCLiESEAl1EJCIU6CIiEaFAFxGJCAW6iEhEKNBFRCJCgS4i\nEhEKdBGRiFCgi4hEhAJdRCQiFOgiIhGhQBcRiYi0gW5mZ5rZSjPbaGYbzGx26vOBZva8mW1OPZ+S\n/XJFROR4wozQDwI3u/to4MvAd81sNDAPeMHdRwAvpN6LiEiepA10d9/m7q+kXn8MbALOAKYCbalm\nbcC0bBUpIiLpmbuHb2zWAKwGzgX+6u4DUp8b0NH9/qhjmoFmgPr6+guXLl3a96qLSGdnJ3V1dfku\nI6dKrc+l1l9Qn3Nt4sSJ69x9TLp2oQPdzOqAF4Hb3f0ZM9t1ZICbWYe7n3AefdSoUf7666+H+r6o\nWLVqFRMmTMh3GTlVan0utf6C+pxrZhYq0ENd5WJmFcDTwBJ3fyb18ftmNjj188HAjt4WKyIifRfm\nKhcDFgOb3P3uI370LDAj9XoGsCzz5YmISFjlIdp8BfgW8K9mtj712QJgIbDUzGYCW4Frs1OiiIiE\nkTbQ3f3/AnacH0/KbDkiItJbWikqIhIRCnQRkYhQoIuIRIQCXUQkIhToIiIRoUAXEYkIBbqISEQo\n0EVEIkKBLiISEQp0EZGIUKCLiESEAl1EJCIU6CIiEaFAFxGJCAW6iEhEKNBFRCJCgS4iEhEKdBGR\niFCgi4hEhAJdRCQiFOgiIhGhQBcRiYi0gW5mPzezHWb2lyM+G2hmz5vZ5tTzKdktU0RE0gkzQv8l\n8NWjPpsHvODuI4AXUu9FRCSP0ga6u68GPjrq46lAW+p1GzAtw3WJiEgPmbunb2TWAPzG3c9Nvd/l\n7gNSrw3o6H5/jGObgWaA+vr6C5cuXZqZyotEZ2cndXV1+S4jp0qtz6XWX1Cfc23ixInr3H1Munbl\nff0id3czO+5vBXcPgABg1KhRPmHChL5+ZVFZtWoV6nO0lVp/QX0uVL29yuV9MxsMkHrekbmSRESk\nN3ob6M8CM1KvZwDLMlOOiIj0VpjLFh8DksAoM3vXzGYCC4HLzWwzcFnqvYiI5FHaOXR3v/44P5qU\n4VpERKQPtFJURCQiFOgiIhGhQBcRiQgFuohIRCjQRUQiQoEuIhIRCnQRkYhQoIuIRIQCXUQkIhTo\nIiIRoUAXEYkIBbqISEQo0EVEIkKBLiISEQp0EZGIUKCLiESEAl1EJCIU6CIiEaFAFxGJCAW6iEhE\nKNBFRCJCgS4iEhF9CnQz+6qZvW5mW8xsXqaKEhGRnut1oJtZGfAQcAUwGrjezEZnqjAREemZvozQ\nxwJb3P1Nd98PPA5MzUxZIiLSU+V9OPYM4J0j3r8LjDu6kZk1A82pt/vM7C99+M5idBqwM99F5Fip\n9bnU+gvqc66dFaZRXwI9FHcPgADAzNa6+5hsf2chUZ+jr9T6C+pzoerLlMt7wJlHvB+a+kxERPKg\nL4H+MjDCzIaZWSXwTeDZzJQlIiI91espF3c/aGb/HVgOlAE/d/cNaQ4Levt9RUx9jr5S6y+ozwXJ\n3D3fNYiISAZopaiISEQo0EVEIiIngV5qWwSY2ZlmttLMNprZBjObne+acsXMyszsVTP7Tb5ryQUz\nG2BmT5nZv5nZJjO7ON81ZZuZ3ZT67/ovZvaYmVXnu6ZMM7Ofm9mOI9fNmNlAM3vezDannk/JZ43H\nkvVAL9EtAg4CN7v7aODLwHdLoM/dZgOb8l1EDt0H/M7dvwCcT8T7bmZnAN8Dxrj7uXRdEPHN/FaV\nFb8EvnrUZ/OAF9x9BPBC6n1BycUIveS2CHD3be7+Sur1x3T9T35GfqvKPjMbCvwj8LN815ILZtYf\nGA8sBnD3/e6+K79V5UQ5UGNm5UAt8B95rifj3H018NFRH08F2lKv24BpOS0qhFwE+rG2CIh8uHUz\nswbgS8Ca/FaSE/cCc4HD+S4kR4YBHwC/SE0z/czMTsp3Udnk7u8B/wL8FdgG/M3dV+S3qpw53d23\npV5vB07PZzHHopOiWWRmdcDTwBx3353verLJzL4G7HD3dfmuJYfKgf8CPOzuXwI+oQD/GZ5JqXnj\nqXT9MhsCnGRm8fxWlXvedb13wV3znYtAL8ktAsysgq4wX+Luz+S7nhz4CvB1M3ubrmm1S83s0fyW\nlHXvAu+6e/e/vp6iK+Cj7DLgLXf/wN0PAM8AjXmuKVfeN7PBAKnnHXmu5/+Ti0AvuS0CzMzomlfd\n5O5357ueXHD3+e4+1N0b6Po7/oO7R3rk5u7bgXfMbFTqo0nAxjyWlAt/Bb5sZrWp/84nEfETwUd4\nFpiRej0DWJbHWo4pF7st9maLgGL3FeBbwL+a2frUZwvc/bd5rEmy40ZgSWqw8ibwT3muJ6vcfY2Z\nPQW8QtfVXK9SBEvie8rMHgMmAKeZ2bvALcBCYKmZzQS2Atfmr8Jj09J/EZGI0ElREZGIUKCLiESE\nAl1EJCIU6CIiEaFAFxGJCAW6iEhEKNBFRCLi/wHmAqYzP3OKTQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fc377cd64e0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "DATASET_FILE = \"LinearRegressionDataSet_2.txt\"\n",
    "dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "\n",
    "def Read_txt():\n",
    "    dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "    X = np.zeros((999, 1))\n",
    "    Y = np.zeros((999, 1))\n",
    "    for i in range(999):\n",
    "        X[i] = dataset[i][1]\n",
    "        Y[i] = dataset[i][2]\n",
    "    return X,Y\n",
    "\n",
    "def runplt():\n",
    "    plt.figure()\n",
    "    plt.axis([0, 11, 0, 70])\n",
    "    plt.grid(True)\n",
    "    return plt\n",
    "\n",
    "if __name__==\"__main__\":\n",
    "    plt=runplt()\n",
    "    X,Y=Read_txt()\n",
    "    plt.plot(X, Y,'k.')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "#### 二、判斷類型並計算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "由圖可知，此樣本集初步判斷爲一元線性回歸，則套用方程 $$Y=\\Theta _{0}+ \\Theta _{1}X $$ ,並初步計算殘差平方和"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAADuCAYAAAAOR30qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADs9JREFUeJzt3U+IJGcdh/Gns9lNdlxxh2wOZqJZQYTMiAchghcvswhe\nve4uCApxBuJJURE8CLl4EFRMQFQMmUYPgngSMRE8eTBBg2aNomCiWQIJbhTdTfbPtIeq6qmpqqmp\nqu7qeqvq+YBke7qHlOB++frr989kNpshSereXV0/gCQpYiBLUiAMZEkKhIEsSYEwkCUpEAayJAXC\nQJakQBjIkhQIA1mSAnF3nQ+fO3dudv78+ZYeRZKG6fnnn39jNpvdf9znagXy+fPnee6555o/lSSN\n0GQyebnK5xxZSFIgDGRJCoSBLEmBMJAlKRAGsiQFwkCWpEIXgEnmP1ut/hsNZEnK2QKeLfj5FdoM\n5VrrkCVp2DaAqwcvJ6m35rfdXWnt325DliQgSt84jF/icBiviIEsaeSSWXFsAjyc+ciK7oI2kCWN\n2AbzWfHXyLfifQrCeLO1p3GGLGmEtjg0Cy4aTxS24tPAi608EdiQJY3KlCh94zB+iHwYzzgijLeB\n6+09GjZkSaNxgUNL2Sq34k3abMVpNmRJA7dLlL5xGCd7PNIKW/HZ+IerCWOwIUsatFPArYOX2SD+\nIPCHot/bBp5p66GOZEOWNEDJrDgO46NacWEY79BFGIMNWdLgrAE3oj/eIirJaY8B3yr6vW5acZoN\nWdJAJK04DuMJ+TCecUQYd9eK0wxkST03Bc4Al6KXfyE/nvgxJUvZZsATrT1dHY4sJPVY0w0e3Y8n\nitiQJfXUOvMwfpJ8GP+TXoUx2JAl9U7TDR4ngKeAi2081FIYyJJ6YgpcZp62DwKvZj5yh4L/33+a\ntrc8L4sjC0k9sEv0pV0cxhPyYTyjINF26EsYgw1ZUtCmzFdPQI3xxAR4mpDHE0UMZEmBKrlOKdHx\nYUDL5shCUmCSGzziMK58GNAmqz4MaNkMZEkBWafZCood+hzECUcWkgKxDrwZ/XHgs+Kj2JAldSw5\nr/jN6DCgbBhvULLBY5+hhDHYkCV1KvXFXeVWXPpGr9mQJXVgg/kXd78nH8Zf55jDgIbJhixphXaJ\nDp6I1ZoVf5ZQTmVriw1Z0gokZxXHYfxl8mF8hSPCeI9oVjzsMAYbsqTWNT0MCKIwHs6XdscxkCW1\n6JilbLeJDmHLCfeIzDY5spDUgi3mS9ng6FacC+PkS7vxhTHYkCUt3SkO3facVTieOAncbO2J+sKG\nLGmJtqgfxpsYxhEDWdISTIHzwJUahwFB9KVd/8+gWBZHFpIW0HRd8Q5jWMZWl4EsqYEp8Gng7eil\ns+KlMJAl1bTF/Lbn20QZm1UYxg+Qv3dJac6QJVWU7LaLw3hCPowLZ8UniGbFhvFxDGRJFWwwv9vu\nj+RHFF+gIIj34h/eZky77RbhyEJSiaaXjG5jCNdnQ5Z0hC3mYfwl8mH8O0pWUIxzp92ibMiSMqbA\nZeZpW6sVG8SLsCFLik2Be4ha8ax4g8dNbMUtsiFLovkGj7PAtVaeaIwMZGn0mhwGBO62Wz4DWRqt\npgfHb+L5E+1whiyNzi5R+sZhXOswoB0M4/bYkKVR2QCuHry0FQfFhiyNQtKK4zD2iMwg2ZClwUu1\n4jsU/633MKAg2JClQUuF8YR8GJe2YsN41QxkaZCmRH+9rxYfBrRLSSue4TkU3XBkIQ1Ok3XFdwGP\n4rribtmQpUGYAmeIEvgWfJ58GP+GkvHEHQzj7tmQpd5rusHD65RCY0OWeu0UpRs83qLkZDbDODQG\nstRLybriY2bF92R/mHxp58lsITKQpV5J7rWLT2artcFjG5eyhc0ZstQbqdueoebJbEe+oYDYkKVe\nWOPQbc+1WrFh3Bc2ZCloTVux2577yIYsBSmZFTdpxTsYxv1kQ5aCk9pptw+cKPiIl4wOkg1ZCkbB\nUrZsGJceBmQY952BLAVhnflStpfIjyc+yTFf2nkY0BA4spA61+QwIL+0GyIbstSJKfBO5iOKr5IP\n41/hWcUjY0OWVs6lbCpmQ5ZW6hSlS9muYyseMQNZWomKhwGdzv7wNH5pNx6OLKRWLXL+xCbe9jwu\nBrLUmjXgxsHLymF8mmh2obFxZCEtXTKeiMO49mFAhvFY2ZClpdoArh68rNyKHU/IhiwtyZTor1Mc\nxrVa8R6GscCGLC1B6pLRGcU158hT2bzpWQdsyFJjU6Kbm1OXjGb/RhW24p34h4axDjOQpUa2gEvA\nbfgr+fHEJyj50s4gVjFHFlItU+Ay87Stta7YEYXK2ZClyjaIWvEMHicfxr+gIIzPcjC3MIxVzoYs\nHWuX+VnFUKMVe4OH6jGQpVKpFRRFQfxf4B3ZH7qmWM04spAKJbvtSsJ4RkEYP4BhrKZsyFJO6kCg\nWl/aOaLQYmzI0twFogSuG8bJumLDWIuxIUtMiVZPxDyVTR2xIWvkdmkWxp7KpuWzIWukmm7wcAWF\n2mMga4RSR2TWOgzIFRRqlyMLjUiylC11RGalw4Ag+uLOS0bVLhuyRiLViv8OvC/z9seAXxf9nkvZ\ntDoGsgau6QqK0jekVjiy0IAlhwEB3yAfxj+jZDmbYazVsyFrgFI77cAjMtUbNmQNyJRDO+3uJR/G\n/6YgjCdE99p5RKa6ZUPWQDRtxa4rVjhsyOq5TCuuddvzDoaxQmJDVo+dAm4dvKzcik8CN1t5ImkR\nNmT1UNKK4zCu1Yr3MIwVKhuyeiZ1gwd4nZIGxYasnpgS/c81dYNHpVacrKAwjBU+G7J6INWKax0G\nZCtWv9iQFbDkBo9UK650GJCtWP1kICtQp5gH8avkxxOPUPKl3T5wsb1Hk1riyEIBWuPQCoosN3ho\noGzICkgyorgB3yEfxj+hZDxhGKv/bMgKwC7w5MFLW7FGyoasDk2Be5iH8X3kw/hf2Io1GjZkdWQd\nePPgZeVWfBa41soTSV2zIWvFkm3PcRjXPgzIMNZw2ZC1Qk23PTsr1jjYkLUCBRs8PCJTyrEhq2Vr\nwI2Dl5VbsVcpaXxsyGpJchhQHMaVW/EZohUUhrHGx4asFjRdQeFhQBo3G7KWaJfFVlAYxho3G7KW\nJDUrfg14d+bthzl0B+kBW7GUsCFrQcm64tSsOBvGMwrC2CMypSwDWQu4AFyK/vh98uOJpynZ9uwR\nmVKWIws10PQwIJeySWUMZNW0AVyN/vh+4G+Zt18HzhX93h42YqmcgayKmi5lOw1cb+WJpKFxhqxj\nLHIY0DaGsVSdDVkltji0PMJWLLXKhqwCyQaPOIwrt+K7iWbFhrHUhA1ZGU2PyHSDh7QoA1mxpuOJ\nCdGCY1dQSIsykEXzFRQPAK+28kTSGDlDHrVFDwMyjKVlsiGPVmqDxxvA/Zm3PwD8uej3vE5JaouB\nPEpbzMPY8YQUDEcWozIl2td8BX5EPoyfomQFhWEstc2GPBqp5WyVWzF4BoW0OjbkwZsS7Zx7Fj5M\nPoxfo6QVzzCMpdWxIQ9W0yMyzwLXWnkiSeVsyIOTHAYUh3HlpWwniMYThrHUFRvyoDTZ9nwf8E0c\nTUjdM5AHoel4wvMnpJAYyL2Xuu0ZDGOpx5wh99o6h257rjQr9rZnKVQGci9dIHcGRVZhK97E256l\ncDmy6BVv8JCGzIbcG2vMw/ga+TB+DyVL2QxjqQ9syL1wgmjUgBs8pAGzIQdtiyiB9+Gn5MP4u5Ss\noDCMpb6xIQdpClw6eOkRmdIo2JCDkhwEFIfxR8mH8VVKVlAYxlKf2ZCD0bQVe4OHNBQGcuemwGXm\naVs5iO8C7rT1UJI6YCB3qslhQAAngZutPJGk7jhD7sw6h27wqHXbs2EsDZENeeWatmJXUEhDZ0Ne\nmeTg+CateA/DWBo+G/JKND2v2FYsjYmB3Kop8Cjwv+ilKygklTCQW7NBtIsD+A/wrszbR5ZfW7E0\nVs6Qly6ZFcdhPCEfxjMKMjc5ON4wlsbKQF6qdea77X5OfkTxJCVf2nlwvDR2jiyWwg0ekhZnQ15I\nZinbJfJh/Apu8JBUiQ25sabXKd0LfA/HE5KybMi1Ja04DuOiDR77lLTiGxjGkorYkGtpOiv2OiVJ\nx7MhV7LItmevU5JUjYF8rF2aHRy/Hb/xTBsPJWmAHFmUSn1xVzmIIVpX7JxYUj025EK75L64yzry\nOqUZhrGkJmzIh0yBzwBvRS89lU3SCtmQ55JZ8VvRyrRsGD+CYSypVTZk4NBytlqz4m380k7Ssoy8\nIW8wX872W/Jh/BQlGzxcQSFpuUbakKc0W8rmeEJSe0bYkLeYh/EXyYfxVUpasWEsqT0jashN77Xb\nAZ5o5YkkKW0kgZy6TqkoiPeLfn4K+AGuKZa0KgMfWSQbPErCeFb0823gbQxjSas04Ia8DrwZ/dFt\nz5J6YIANOWnFdcM4OQzIMJbUjQEF8pTov078xV3lIzKT255dUyypWwMZWTS9TskVFJLC0fNAbhrE\n3uAhKTw9HlmsMQ/jt8mH8UfwBg9JvdLDhtz0XrvTwPVWnkiSlqFnDXmNeRi/QD6M9yhpxYaxpLD1\npCE3bcUngZutPJEkLVsPGvIG8zD+Nvkw/gclJ7MZxpL6I+BALtj2/LnMR2bAg9nfS84q9mQ2Sf0S\n4Mgicyrbx4FfZj5SeBiQZxVL6rfAArnprNgwltR/gYwskvFE6l67StueIVpBYRhL6r8AGnLT3Xal\nb0hS73TYkKfAGeZhXLsVG8aShqWjhnxMK36QaDlbjrvtJA3XihtyMis+phUXhvEehrGkIVthQ07d\na7cPnMi8/QTREuKcTeDFFp9LksKwgoY8Jcr91AaPbBjPOCKM9zCMJY1Fy4E8BT4F3IFb5McTL3DE\nd3ObeJ2SpLFpOZC/AtyGPwGnMm/NgA9lP58c12YrljQ+LQfyK9E/0lufb1FynZKNWNJ4tRzI743+\n8RjRF3kzjvgacQ/vtpM0di0H8uPA3cXL2+a3PTsrliRoPZAvAj8k2pGXmBCNJ/YxiCXpwArWIV/E\n4JWk4wVy2pskyUCWpEAYyJIUCANZkgJhIEtSICazWfWD3ieTyevAy+09jiQN0kOz2ez+4z5UK5Al\nSe1xZCFJgTCQJSkQBrIkBcJAlqRAGMiSFAgDWZICYSBLUiAMZEkKhIEsSYH4PyEcMGl6w7zDAAAA\nAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fc376ad8f28>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "殘差平方和: 0.000000\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "DATASET_FILE = \"LinearRegressionDataSet_2.txt\"\n",
    "dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "\n",
    "def Read_txt():\n",
    "    dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "    X = np.zeros((999, 1))\n",
    "    Y = np.zeros((999, 1))\n",
    "    for i in range(999):\n",
    "        X[i] = dataset[i][1]\n",
    "        Y[i] = dataset[i][2]\n",
    "    return X,Y\n",
    "\n",
    "def show_line(X,Y):\n",
    "    regr = LinearRegression()\n",
    "    regr.fit(X, Y)\n",
    "    plt.scatter(X, Y, color='yellow')\n",
    "    plt.plot(X, regr.predict(X), color='red')\n",
    "    plt.xticks(())\n",
    "    plt.yticks(())\n",
    "    plt.show()\n",
    "    \n",
    "if __name__==\"__main__\":\n",
    "    X,Y=Read_txt()\n",
    "    show_line(X,Y)\n",
    "    model = LinearRegression()\n",
    "    model.fit(X, Y)\n",
    "    print('殘差平方和: %f' % np.mean((model.predict(X) - Y) ** 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "由圖和差值分析可得，可以使用一元回歸方程，由於使用scikit-learn中的LinearRegression估計器判斷殘方差和爲0,故使此計算 $$ \\Theta _{0} 和  \\Theta _{1}  $$ 並繪制圖形"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run time 0:00:00.015086\n",
      "Intercept value (Θ0) [ 26.99999998]\n",
      "coefficient (Θ1) [[ 4.00000001]]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAADuCAYAAAAOR30qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADs9JREFUeJzt3U+IJGcdh/Gns9lNdlxxh2wOZqJZQYTMiAchghcvswhe\nve4uCApxBuJJURE8CLl4EFRMQFQMmUYPgngSMRE8eTBBg2aNomCiWQIJbhTdTfbPtIeq6qmpqqmp\nqu7qeqvq+YBke7qHlOB++frr989kNpshSereXV0/gCQpYiBLUiAMZEkKhIEsSYEwkCUpEAayJAXC\nQJakQBjIkhQIA1mSAnF3nQ+fO3dudv78+ZYeRZKG6fnnn39jNpvdf9znagXy+fPnee6555o/lSSN\n0GQyebnK5xxZSFIgDGRJCoSBLEmBMJAlKRAGsiQFwkCWpEIXgEnmP1ut/hsNZEnK2QKeLfj5FdoM\n5VrrkCVp2DaAqwcvJ6m35rfdXWnt325DliQgSt84jF/icBiviIEsaeSSWXFsAjyc+ciK7oI2kCWN\n2AbzWfHXyLfifQrCeLO1p3GGLGmEtjg0Cy4aTxS24tPAi608EdiQJY3KlCh94zB+iHwYzzgijLeB\n6+09GjZkSaNxgUNL2Sq34k3abMVpNmRJA7dLlL5xGCd7PNIKW/HZ+IerCWOwIUsatFPArYOX2SD+\nIPCHot/bBp5p66GOZEOWNEDJrDgO46NacWEY79BFGIMNWdLgrAE3oj/eIirJaY8B3yr6vW5acZoN\nWdJAJK04DuMJ+TCecUQYd9eK0wxkST03Bc4Al6KXfyE/nvgxJUvZZsATrT1dHY4sJPVY0w0e3Y8n\nitiQJfXUOvMwfpJ8GP+TXoUx2JAl9U7TDR4ngKeAi2081FIYyJJ6YgpcZp62DwKvZj5yh4L/33+a\ntrc8L4sjC0k9sEv0pV0cxhPyYTyjINF26EsYgw1ZUtCmzFdPQI3xxAR4mpDHE0UMZEmBKrlOKdHx\nYUDL5shCUmCSGzziMK58GNAmqz4MaNkMZEkBWafZCood+hzECUcWkgKxDrwZ/XHgs+Kj2JAldSw5\nr/jN6DCgbBhvULLBY5+hhDHYkCV1KvXFXeVWXPpGr9mQJXVgg/kXd78nH8Zf55jDgIbJhixphXaJ\nDp6I1ZoVf5ZQTmVriw1Z0gokZxXHYfxl8mF8hSPCeI9oVjzsMAYbsqTWNT0MCKIwHs6XdscxkCW1\n6JilbLeJDmHLCfeIzDY5spDUgi3mS9ng6FacC+PkS7vxhTHYkCUt3SkO3facVTieOAncbO2J+sKG\nLGmJtqgfxpsYxhEDWdISTIHzwJUahwFB9KVd/8+gWBZHFpIW0HRd8Q5jWMZWl4EsqYEp8Gng7eil\ns+KlMJAl1bTF/Lbn20QZm1UYxg+Qv3dJac6QJVWU7LaLw3hCPowLZ8UniGbFhvFxDGRJFWwwv9vu\nj+RHFF+gIIj34h/eZky77RbhyEJSiaaXjG5jCNdnQ5Z0hC3mYfwl8mH8O0pWUIxzp92ibMiSMqbA\nZeZpW6sVG8SLsCFLik2Be4ha8ax4g8dNbMUtsiFLovkGj7PAtVaeaIwMZGn0mhwGBO62Wz4DWRqt\npgfHb+L5E+1whiyNzi5R+sZhXOswoB0M4/bYkKVR2QCuHry0FQfFhiyNQtKK4zD2iMwg2ZClwUu1\n4jsU/633MKAg2JClQUuF8YR8GJe2YsN41QxkaZCmRH+9rxYfBrRLSSue4TkU3XBkIQ1Ok3XFdwGP\n4rribtmQpUGYAmeIEvgWfJ58GP+GkvHEHQzj7tmQpd5rusHD65RCY0OWeu0UpRs83qLkZDbDODQG\nstRLybriY2bF92R/mHxp58lsITKQpV5J7rWLT2artcFjG5eyhc0ZstQbqdueoebJbEe+oYDYkKVe\nWOPQbc+1WrFh3Bc2ZCloTVux2577yIYsBSmZFTdpxTsYxv1kQ5aCk9pptw+cKPiIl4wOkg1ZCkbB\nUrZsGJceBmQY952BLAVhnflStpfIjyc+yTFf2nkY0BA4spA61+QwIL+0GyIbstSJKfBO5iOKr5IP\n41/hWcUjY0OWVs6lbCpmQ5ZW6hSlS9muYyseMQNZWomKhwGdzv7wNH5pNx6OLKRWLXL+xCbe9jwu\nBrLUmjXgxsHLymF8mmh2obFxZCEtXTKeiMO49mFAhvFY2ZClpdoArh68rNyKHU/IhiwtyZTor1Mc\nxrVa8R6GscCGLC1B6pLRGcU158hT2bzpWQdsyFJjU6Kbm1OXjGb/RhW24p34h4axDjOQpUa2gEvA\nbfgr+fHEJyj50s4gVjFHFlItU+Ay87Stta7YEYXK2ZClyjaIWvEMHicfxr+gIIzPcjC3MIxVzoYs\nHWuX+VnFUKMVe4OH6jGQpVKpFRRFQfxf4B3ZH7qmWM04spAKJbvtSsJ4RkEYP4BhrKZsyFJO6kCg\nWl/aOaLQYmzI0twFogSuG8bJumLDWIuxIUtMiVZPxDyVTR2xIWvkdmkWxp7KpuWzIWukmm7wcAWF\n2mMga4RSR2TWOgzIFRRqlyMLjUiylC11RGalw4Ag+uLOS0bVLhuyRiLViv8OvC/z9seAXxf9nkvZ\ntDoGsgau6QqK0jekVjiy0IAlhwEB3yAfxj+jZDmbYazVsyFrgFI77cAjMtUbNmQNyJRDO+3uJR/G\n/6YgjCdE99p5RKa6ZUPWQDRtxa4rVjhsyOq5TCuuddvzDoaxQmJDVo+dAm4dvKzcik8CN1t5ImkR\nNmT1UNKK4zCu1Yr3MIwVKhuyeiZ1gwd4nZIGxYasnpgS/c81dYNHpVacrKAwjBU+G7J6INWKax0G\nZCtWv9iQFbDkBo9UK650GJCtWP1kICtQp5gH8avkxxOPUPKl3T5wsb1Hk1riyEIBWuPQCoosN3ho\noGzICkgyorgB3yEfxj+hZDxhGKv/bMgKwC7w5MFLW7FGyoasDk2Be5iH8X3kw/hf2Io1GjZkdWQd\nePPgZeVWfBa41soTSV2zIWvFkm3PcRjXPgzIMNZw2ZC1Qk23PTsr1jjYkLUCBRs8PCJTyrEhq2Vr\nwI2Dl5VbsVcpaXxsyGpJchhQHMaVW/EZohUUhrHGx4asFjRdQeFhQBo3G7KWaJfFVlAYxho3G7KW\nJDUrfg14d+bthzl0B+kBW7GUsCFrQcm64tSsOBvGMwrC2CMypSwDWQu4AFyK/vh98uOJpynZ9uwR\nmVKWIws10PQwIJeySWUMZNW0AVyN/vh+4G+Zt18HzhX93h42YqmcgayKmi5lOw1cb+WJpKFxhqxj\nLHIY0DaGsVSdDVkltji0PMJWLLXKhqwCyQaPOIwrt+K7iWbFhrHUhA1ZGU2PyHSDh7QoA1mxpuOJ\nCdGCY1dQSIsykEXzFRQPAK+28kTSGDlDHrVFDwMyjKVlsiGPVmqDxxvA/Zm3PwD8uej3vE5JaouB\nPEpbzMPY8YQUDEcWozIl2td8BX5EPoyfomQFhWEstc2GPBqp5WyVWzF4BoW0OjbkwZsS7Zx7Fj5M\nPoxfo6QVzzCMpdWxIQ9W0yMyzwLXWnkiSeVsyIOTHAYUh3HlpWwniMYThrHUFRvyoDTZ9nwf8E0c\nTUjdM5AHoel4wvMnpJAYyL2Xuu0ZDGOpx5wh99o6h257rjQr9rZnKVQGci9dIHcGRVZhK97E256l\ncDmy6BVv8JCGzIbcG2vMw/ga+TB+DyVL2QxjqQ9syL1wgmjUgBs8pAGzIQdtiyiB9+Gn5MP4u5Ss\noDCMpb6xIQdpClw6eOkRmdIo2JCDkhwEFIfxR8mH8VVKVlAYxlKf2ZCD0bQVe4OHNBQGcuemwGXm\naVs5iO8C7rT1UJI6YCB3qslhQAAngZutPJGk7jhD7sw6h27wqHXbs2EsDZENeeWatmJXUEhDZ0Ne\nmeTg+CateA/DWBo+G/JKND2v2FYsjYmB3Kop8Cjwv+ilKygklTCQW7NBtIsD+A/wrszbR5ZfW7E0\nVs6Qly6ZFcdhPCEfxjMKMjc5ON4wlsbKQF6qdea77X5OfkTxJCVf2nlwvDR2jiyWwg0ekhZnQ15I\nZinbJfJh/Apu8JBUiQ25sabXKd0LfA/HE5KybMi1Ja04DuOiDR77lLTiGxjGkorYkGtpOiv2OiVJ\nx7MhV7LItmevU5JUjYF8rF2aHRy/Hb/xTBsPJWmAHFmUSn1xVzmIIVpX7JxYUj025EK75L64yzry\nOqUZhrGkJmzIh0yBzwBvRS89lU3SCtmQ55JZ8VvRyrRsGD+CYSypVTZk4NBytlqz4m380k7Ssoy8\nIW8wX872W/Jh/BQlGzxcQSFpuUbakKc0W8rmeEJSe0bYkLeYh/EXyYfxVUpasWEsqT0jashN77Xb\nAZ5o5YkkKW0kgZy6TqkoiPeLfn4K+AGuKZa0KgMfWSQbPErCeFb0823gbQxjSas04Ia8DrwZ/dFt\nz5J6YIANOWnFdcM4OQzIMJbUjQEF8pTov078xV3lIzKT255dUyypWwMZWTS9TskVFJLC0fNAbhrE\n3uAhKTw9HlmsMQ/jt8mH8UfwBg9JvdLDhtz0XrvTwPVWnkiSlqFnDXmNeRi/QD6M9yhpxYaxpLD1\npCE3bcUngZutPJEkLVsPGvIG8zD+Nvkw/gclJ7MZxpL6I+BALtj2/LnMR2bAg9nfS84q9mQ2Sf0S\n4Mgicyrbx4FfZj5SeBiQZxVL6rfAArnprNgwltR/gYwskvFE6l67StueIVpBYRhL6r8AGnLT3Xal\nb0hS73TYkKfAGeZhXLsVG8aShqWjhnxMK36QaDlbjrvtJA3XihtyMis+phUXhvEehrGkIVthQ07d\na7cPnMi8/QTREuKcTeDFFp9LksKwgoY8Jcr91AaPbBjPOCKM9zCMJY1Fy4E8BT4F3IFb5McTL3DE\nd3ObeJ2SpLFpOZC/AtyGPwGnMm/NgA9lP58c12YrljQ+LQfyK9E/0lufb1FynZKNWNJ4tRzI743+\n8RjRF3kzjvgacQ/vtpM0di0H8uPA3cXL2+a3PTsrliRoPZAvAj8k2pGXmBCNJ/YxiCXpwArWIV/E\n4JWk4wVy2pskyUCWpEAYyJIUCANZkgJhIEtSICazWfWD3ieTyevAy+09jiQN0kOz2ez+4z5UK5Al\nSe1xZCFJgTCQJSkQBrIkBcJAlqRAGMiSFAgDWZICYSBLUiAMZEkKhIEsSYH4PyEcMGl6w7zDAAAA\nAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fc36884b358>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression\n",
    "import random\n",
    "import datetime\n",
    "\n",
    "DATASET_FILE = \"LinearRegressionDataSet_2.txt\"\n",
    "def Read_txt():\n",
    "    dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "    X = np.zeros((999, 1))\n",
    "    Y = np.zeros((999, 1))\n",
    "    for i in range(0, 999):\n",
    "        X[i] = dataset[i][1]\n",
    "        Y[i] = dataset[i][2]\n",
    "    return X,Y\n",
    "\n",
    "def linear_model_main(X_parameters, Y_parameters, predict_value=0):\n",
    "    # Create linear regression object\n",
    "    regr = LinearRegression()\n",
    "    regr.fit(X_parameters, Y_parameters)\n",
    "    predict_outcome = regr.predict(predict_value)\n",
    "    predictions = {}\n",
    "    predictions['intercept'] = regr.intercept_\n",
    "    predictions['coefficient'] = regr.coef_\n",
    "    predictions['predicted_value'] = predict_outcome\n",
    "    return predictions\n",
    "\n",
    "def show_line(X,Y):\n",
    "    regr = LinearRegression()\n",
    "    regr.fit(X, Y)\n",
    "    plt.scatter(X, Y, color='yellow')\n",
    "    plt.plot(X, regr.predict(X), color='red')\n",
    "    plt.xticks(())\n",
    "    plt.yticks(())\n",
    "    plt.show()\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    b=datetime.datetime.now()\n",
    "    X,Y=Read_txt()\n",
    "    result = linear_model_main(X, Y)\n",
    "    e=datetime.datetime.now()\n",
    "    print(\"Run time\",(e-b))\n",
    "    print(\"Intercept value (Θ0)\", result['intercept'])\n",
    "    print(\"coefficient (Θ1)\", result['coefficient'])\n",
    "#     print(\"Predicted value: \", result['predicted_value'])\n",
    "    show_line(X,Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "#### 三、 牛頓法+梯度下降（牛頓發由於無法正確模擬，故採用梯度下降輔助計算）（未知不正確）（存在未知錯誤）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "usage: __main__.py [-h] [--batch BATCH] [--epoch EPOCH] [--rate RATE]\n",
      "__main__.py: error: unrecognized arguments: -f /run/user/1000/jupyter/kernel-2da79057-b38f-485f-ad05-1ac8ecebeb6b.json\n"
     ]
    },
    {
     "ename": "SystemExit",
     "evalue": "2",
     "output_type": "error",
     "traceback": [
      "An exception has occurred, use %tb to see the full traceback.\n",
      "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/cgh/.linuxbrew/opt/python3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2889: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
      "  warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
     ]
    }
   ],
   "source": [
    "#!/usr/bin/env python3\n",
    "\n",
    "import random\n",
    "import argparse\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# dataset file\n",
    "DATASET_FILE = \"LinearRegressionDataSet_2.txt\"\n",
    "\n",
    "# default learning rate\n",
    "ALPHA = 0.06\n",
    "# default min batch size\n",
    "MIN_BATCH = 200\n",
    "# default learning times  \n",
    "LEARNING_TIMES = 200\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def generate_min_batch(dataset, batch_size):\n",
    "    dataset_shape = dataset.shape\n",
    "    if dataset_shape[0] < batch_size:\n",
    "        batch_size = dataset_shape[0]\n",
    "    row_indexes = [i for i in range(dataset_shape[0])]\n",
    "    random.shuffle(row_indexes)\n",
    "    random_row_indexes = row_indexes[:batch_size]\n",
    "    hsplit_indice = dataset_shape[1] - 1\n",
    "    random_dataset = (dataset[random_row_indexes, :hsplit_indice],\n",
    "                      dataset[random_row_indexes, hsplit_indice:])\n",
    "    return random_dataset\n",
    "\n",
    "\n",
    "def hypothesis(input_x, θ):\n",
    "    res = np.dot(input_x, θ.T)\n",
    "    return res\n",
    "\n",
    "\n",
    "def deviation(input_x, output_y, θ):\n",
    "    res = sum([pow(hypothesis(x, θ) - y, 2) for x, y in zip(input_x, output_y)])\n",
    "    return res / (2 * input_x.shape[0])\n",
    "\n",
    "\n",
    "# 獨立的海森函數（未知錯誤）\n",
    "# def hession(input_x,θ):\n",
    "#     for i in range(θ.shape[1]):\n",
    "#         res=np.dot(\n",
    "#             (np.array([hypothesis(row, θ) for row in input_x])),\n",
    "#             (1-(np.array([hypothesis(row, θ) for row in input_x]))))\n",
    "#         res=np.dot(res,\n",
    "#                    input_x[:, i:i+1])\n",
    "#         res=np.dot(res,\n",
    "#                    np.transpose(input_x[:,i:i+1]))\n",
    "#     print(res)\n",
    "#     return np.linalg.inv(res)\n",
    "\n",
    "def gradientDescent(dataset, batch_size, learning_rate, learning_times):\n",
    "    print(\"------Start gradient descent------\")\n",
    "    process = 0\n",
    "    FI=0\n",
    "    hsplit_indice = (dataset.shape)[1] - 1\n",
    "    θ = np.random.rand(1, hsplit_indice)\n",
    "\n",
    "    _θ = θ.copy()\n",
    "    while process < learning_times:\n",
    "        batch_data = generate_min_batch(dataset, batch_size)\n",
    "        input_x = batch_data[0]\n",
    "        output_y = batch_data[1]\n",
    "\n",
    "        if FI==0:\n",
    "            for a in range(20):\n",
    "                for i in range(θ.shape[1]):\n",
    "                    derivative = np.dot(\n",
    "                        (np.array([hypothesis(row, θ) for row in input_x]) - output_y).T,\n",
    "                        input_x[:, i:i + 1])\n",
    "                    avg_derivative = derivative / batch_size\n",
    "                    # θ = θ - αJ(θ)'\n",
    "                    _θ[0, i] = θ[0, i] - learning_rate * avg_derivative\n",
    "                θ = _θ\n",
    "        # 牛頓法 由於直接使用牛頓法無法正確收斂，故採用梯度計算輔助牛頓法\n",
    "        for i in range(θ.shape[1]):\n",
    "            derivative = np.dot(\n",
    "                (np.array([hypothesis(row, θ) for row in input_x]) - output_y).T,\n",
    "                input_x[:, i:i + 1])\n",
    "            avg_derivative = derivative / batch_size\n",
    "            # 海森矩陣（位置錯誤）\n",
    "            h2 = np.dot((np.multiply((np.reshape((input_x[:, 0]).T,\n",
    "                                                (MIN_BATCH, 1)) - np.array([hypothesis(row, θ) for row in input_x])),\n",
    "                                    input_x[:, i:i + 1])).T,\n",
    "                        (np.reshape((input_x[:, 0]).T,\n",
    "                                    (MIN_BATCH, 1))))\n",
    "            h3 = (np.array([hypothesis(row, θ) for row in input_x])) * h2\n",
    "            hession = 0\n",
    "            for z in range(batch_size):\n",
    "                hession += h3[z][0]\n",
    "            Hession = hession / batch_size\n",
    "            _θ[0, i] = θ[0, i] - avg_derivative / Hession\n",
    "        F=1\n",
    "        θ = _θ\n",
    "        # calculate deviation\n",
    "        batch_test_data = generate_min_batch(dataset, batch_size)\n",
    "        current_deviation = deviation(batch_test_data[0], batch_test_data[1], θ)\n",
    "        if (process % 10 == 0):\n",
    "            print(\"step %d, deviation %2f\" % (process, current_deviation))\n",
    "            print(\"θ: %s\" % (θ))\n",
    "        process += 1\n",
    "        # 輔助快速判斷是否學習完成\n",
    "        # if current_deviation<1e-11:\n",
    "        #     break\n",
    "\n",
    "    print(\"θ value is: %s\" % (θ))\n",
    "    print(\"------End gradient descent------\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    parser = argparse.ArgumentParser()\n",
    "    parser.add_argument('--batch', type=int, default=MIN_BATCH)\n",
    "    parser.add_argument('--epoch', type=int, default=LEARNING_TIMES)\n",
    "    parser.add_argument('--rate', type=float, default=ALPHA)\n",
    "\n",
    "    args = parser.parse_args()\n",
    "    dataset = np.loadtxt(DATASET_FILE, dtype=np.float64)\n",
    "    gradientDescent(dataset, args.batch, args.rate, args.epoch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "由此結束"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "補充：\n",
    "LinearRegression\n",
    "　　　　损失函数：\n",
    "\n",
    "　　　　LinearRegression类就是我们平时说的最常见普通的线性回归，它的损失函数也是最简单的，如下：\n",
    "\n",
    "　　　　J(θ)=12(Xθ−Y)T(Xθ−Y)J(θ)=12(Xθ−Y)T(Xθ−Y)\n",
    "　　　　损失函数的优化方法：\n",
    "\n",
    "　　　　对于这个损失函数，一般有梯度下降法和最小二乘法两种极小化损失函数的优化方法，而scikit中的LinearRegression类用的是最小二乘法。通过最小二乘法，可以解出线性回归系数θθ为：\n",
    "\n",
    "　　　　θ=(XTX)−1XTYθ=(XTX)−1XTY\n",
    "　　　　验证方法：\n",
    "\n",
    "　　　　LinearRegression类并没有用到交叉验证之类的验证方法，需要我们自己把数据集分成训练集和测试集，然后训练优化。"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Raw Cell Format",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
